import pandas as pd

def read_file(filename,sheet_name):
    if filename.endswith('csv'):
        return pd.read_csv(filename)
    else: 
        return pd.read_excel(filename,sheet_name=sheet_name)

def file_data(file_info):
    data ={}
    # 遍历字典，读取文件，并赋值给相应的变量
    for var_name,info in file_info.items():
        data[var_name] = read_file(info['filename'],info['sheet_name'])
    return data


def price_deal(pricedata):
    low, high = map(float,pricedata.split('-'))
    return (low+high)/2



def deal_data(data):

    landarea = data['landarea']
    plant_plan = data['plant_plan']
    plant_data = data['plant_data']

    # 处理缺失值
    plant_plan['种植地块'] = plant_plan['种植地块'].fillna(method='ffill')
    # 对地块名称和类型之间映射及替换 | 添加新列
    land_mapping = dict(zip(landarea['地块名称'],landarea['地块类型']))
    plant_plan['地块类型']=[land_mapping.get(land_name) for land_name in plant_plan['种植地块']]
    
    # 删除含有nan的行
    plant_data = plant_data.dropna().copy()
    # 计算平均价格
    plant_data['平均销售单价'] = plant_data['销售单价/(元/斤)'].apply(price_deal)
    # 选取需要的列
    plant_useplan = plant_plan[['作物名称','地块类型','种植季次','种植面积/亩']]
    plant_data['except_area'] = plant_data['亩产量/斤']*plant_data['平均销售单价']-plant_data['种植成本/(元/亩)']
    plant_usedata = plant_data[['作物名称','地块类型','种植季次','亩产量/斤','种植成本/(元/亩)','平均销售单价','except_area']]
    plant_usedata[['作物名称', '地块类型', '种植季次']] = plant_usedata[['作物名称', '地块类型', '种植季次']].apply(lambda x: x.str.strip())
    return plant_useplan,plant_usedata


def calculate(plant_useplan,plant_usedata):
    # 合并两个数据集
    merged_plant = pd.merge(plant_useplan, plant_usedata, on=['作物名称', '地块类型'], how='left')
    # 计算利润
    merged_plant['利润']=merged_plant['种植面积/亩']*merged_plant['except_area']
    return merged_plant


def main():
    file_path = r'D:\ProgramFile2_OR\Study_Practice_Share\weblog\2024年全国大学生数学建模竞赛C题'
     # 定义文件名、sheet_name 和变量名的映射
    file_info = {
        'landarea': {'filename': file_path+'\附件1.xlsx', 'sheet_name': '乡村的现有耕地'},
        'plant_plan': {'filename': file_path+'\附件2.xlsx', 'sheet_name': '2023年的农作物种植情况'},
        'plant_data': {'filename': file_path+'\附件2.xlsx', 'sheet_name': '2023年统计的相关数据'}
    }
    data = file_data(file_info)
    plant_useplan,plant_usedata = deal_data(data)
    merged_plant = calculate(plant_useplan,plant_usedata)
    merged_plant.to_csv('merged_plant.csv',index=False)
    print(merged_plant['利润'].sum())
    

if __name__ == '__main__':
    main()